* Firm organization with multiple establishments
* Appendix A: Data preparation for Tables A.2-A.9; A.11-A.13

clear all 
set matsize 2000
set more off

capture log close
log using log/25_cmrh-p_replication_firm_data.log, replace

use data/Panel.dta if jahr >= 2000 & jahr <= 2010, clear
desc

//	Drop employee characteristics that are not needed in analyses
drop grund stib frau foreign ein_erw nbr_occ tage_wz tage_occ ///
	l_beruf f_beruf l_tentgelt f_tentgelt d_age

//	Drop firm characteristics that are not needed in analyses
drop mean_wage verysmall
drop grd_dat lzt_dat ao_bula
order limit cens lnw, after(tentgelt)
	
********************************************************************************
***	Sample restriction *********************************************************
********************************************************************************

*	Minimum firm size
bys untid jahr: egen empl_unt = count(persnr)
by  untid: egen min_empl = min(empl_unt)
qui keep if min_empl >= 10
drop min_empl

*	Cleaning: social security limit
qui replace tentgelt = limit if cens == 1
drop limit cens
qui replace lnw = ln(tentgelt) 		
count if lnw == .

********************************************************************************
***	Establishment characteristics **********************************************
********************************************************************************

bys betnr jahr: egen empl_bet = count(persnr)

merge m:1 betnr jahr using data/BHPinclUntID.dta
qui keep if _merge == 3
drop _merge

drop count_est_siab chge_est entry_unt jahr_eins_final jahr_eins_est az_vz eintritt ///
	betnr_vor untid_vor verysmall matchtype d_multest_4 ever_multest_4 always_multest_4 vorg_died ///
	mode austritt betnr_nach nachf_willbenew untid_nach vorgaenger nachfolger ///
	erst_jahr_eintritt jahr_lzt_est first_year_ME entry_dyn exit_dyn exit_unt add_est mode_unt sector sector2 hq_sector

order hq_kreis, after(ao_bula)

********************************************************************************
***	Layer classification based on KldB1988, managerial organization ************
********************************************************************************

tab layer, missing

bys untid jahr: egen lowest = min(layer)
bys untid jahr: egen second_lowest = min(layer) if layer > lowest
bys untid jahr: egen third_lowest = min(layer) if layer > second_lowest
bys untid jahr: egen highest = min(layer) if layer > third_lowest

gen layer_rank = .
replace layer_rank = 0 if layer == lowest
replace layer_rank = 1 if layer == second_lowest & layer_rank == .
replace layer_rank = 2 if layer == third_lowest & layer_rank == .
replace layer_rank = 3 if layer == highest & layer_rank == .
drop lowest *_lowest highest

tab2 layer_rank layer, missing

// Number of layers
egen flg_untjhrlay = tag(untid jahr layer)
bys untid jahr: egen count_lay_unt = total(flg_untjhrlay)
drop flg_untjhrlay

// Combination of layers
sort untid jahr
qui {
	forvalues l = 0/3 {
		egen e_`l' = anymatch(layer), values(`l')
		by untid jahr: egen count_layer`l' = sum(e_`l')
		by untid jahr: gen l_`l' = (count_layer`l' > 0)
		label variable l_`l' "Dummy firm has layer `l'"
		capture drop e_`l' count_layer`l'
	}
}

capture drop comb_layer_unt
gen comb_layer_unt = l_3 * 1000 + l_2 * 100 + l_1 * 10 + l_0 
label variable comb_layer_unt "Pattern of layers, from high to low, firm level"
label define structure 1 "0" 10 "1" 100 "2" 1000 "3" 11 "0 + 1" 101 "0 + 2" 1001 "0 + 3" ///
	110 "1 + 2" 1010 "1 + 3" 1100 "2 + 3" 111 "0 + 1 + 2" 1101 "0 + 2 + 3" 1011 "0 + 1 + 3" ///
	1110 "1 + 2 + 3" 1111 "All layers"
label values comb_layer_unt structure
drop l_*

//	Number of managerial layers
egen flg_untjhrlay = tag(untid jahr layer_rank)
qui replace flg_untjhrlay = 0 if layer_rank == 0
bys untid jahr: egen count_mgmt_unt = total(flg_untjhrlay)
drop flg_untjhrlay

********************************************************************************
***	Save individual-level data *************************************************
********************************************************************************

preserve

cap drop lnw
gen lnw = log(tentgelt)
*	Multi-establishment firms
egen flg_estjhr = tag(betnr jahr)
bys untid jahr: egen count_est = total(flg_estjhr)

keep persnr betnr jahr tentgelt lnw d_educ tage_bet layer layer_rank count_est hauptbet w08_5 beruf*
save "data/CMRH_2000-2010_p_layer-ind.dta", replace

restore

********************************************************************************
***	Firm characteristics *******************************************************
********************************************************************************

// Other sums
bys untid jahr: egen tot_wages = total(tentgelt)
bys untid jahr: egen tot_educ = total(d_educ)
bys untid jahr: egen tot_tenure = total(tage_bet)

*	Multi-establishment firms
egen flg_estjhr = tag(betnr jahr)
bys untid jahr: egen count_est = total(flg_estjhr)
by  untid: egen ever_mbu = max(count_est)

// Reduce to firm-level variables
preserve
keep untid jahr empl_unt tot_* count_lay_unt comb_layer_unt count_mgmt_unt count_est

duplicates drop
duplicates report untid jahr
tab jahr, missing
label variable empl_unt "# of employees in firm"
label variable tot_wages "Wage sum in firm"
label variable tot_educ "Education sum in firm"
label variable count_lay_unt "# of layers in firm"
label variable count_mgmt_unt "# of mgmt layers in firm"
describe

save data/firmlevel_CMRH.dta, replace
restore

********************************************************************************
***	Hierarchy characteristics **************************************************
********************************************************************************

bys untid jahr layer_rank: egen empl_lyr = count(persnr)
bys untid jahr layer_rank: egen wage_lyr = total(tentgelt)
bys untid jahr layer_rank: egen educ_lyr = total(d_educ)
bys untid jahr layer_rank: egen tenure_lyr = total(tage_bet)

egen flg_untjhrlay = tag(untid jahr layer_rank)
keep if flg_untjhrlay == 1
drop flg_untjhrlay

keep wage_lyr empl_lyr educ_lyr tenure_lyr jahr untid layer_rank
reshape wide wage_lyr empl_lyr educ_lyr tenure_lyr, i(jahr untid) j(layer_rank)

********************************************************************************
***	Combine data ***************************************************************
********************************************************************************

merge 1:1 untid jahr using data/firmlevel_CMRH.dta
drop if _merge == 2
drop _merge

capture drop flg_unt
capture drop flg_untjhr
capture drop flg_estjhr

egen flg_unt = tag(untid)
egen flg_untjhr = tag(untid jahr)

xtset, clear
sort untid jahr
xtset untid jahr

merge m:1 untid jahr using data/Amadeus_Aug2018_untid.dta
qui drop if _merge == 2
drop _merge
drop NACE Empl OperRev ValueAdded
drop ln_sales ln_operrev

cap drop flg_untjhr
egen flg_untjhr = tag(untid jahr)


************************************
** Other supporting variables  *****
************************************
gen avg_wage=tot_wages/empl_unt

*marking firms with adjacent (consecutively ordered) layers
gen correctlyr= (comb_layer_unt == 1 | comb_layer_unt ==11  | comb_layer_unt ==111 | comb_layer_unt ==1111)
*keep if correctlyr==1 	// remove the asterisk and let this instruction be executed to 
						// produce tables with consecutively ordered layers.
						
*average wage and skill by layer
foreach v in wage educ tenure {
	forvalues l=0/3 {
		gen avg_`v'_lyr`l'=`v'_lyr`l'/empl_lyr`l'
		
	}
}

*log sales
gen ln_sales = log(Sales)

*log employees
gen ln_empl_unt=log(empl_unt)

*log average wage
gen lavgw=log(avg_wage)

sort untid jahr
*number of layers next period
gen lnext = F.count_mgmt_unt

*number of layers in two periods
gen lnext2=F2.count_mgmt_unt

*setting them to -1 if the firm exits
replace lnext=-1 if lnext==.
replace lnext2=-1 if lnext2==.

*next organizational type, and its missing values
gen comb_layer_unt_next=F.comb_layer_unt
replace comb_layer_unt_next=-1 if comb_layer_unt_next==.

sort untid jahr

*average wage, skills in pre-existing layers and their log change
*****************************************************************
gen avg_wage_notop_next=.
gen avg_wage_notop_bef=.
gen dlog_notop=.
**if firms are adding layers
replace avg_wage_notop_next=(F.wage_lyr0+F.wage_lyr1+F.wage_lyr2)/(F.empl_lyr0+F.empl_lyr1+F.empl_lyr2) if count_mgmt_unt==2 & lnext>2
replace avg_wage_notop_next=(F.wage_lyr0+F.wage_lyr1)/(F.empl_lyr0+F.empl_lyr1) if count_mgmt_unt==1 & lnext>1
replace avg_wage_notop_next=(F.wage_lyr0)/(F.empl_lyr0) if count_mgmt_unt==0 & lnext>0
replace dlog_notop=log(avg_wage_notop_next)-log(avg_wage) if count_mgmt_unt<lnext
count if dlog_notop~=. & lnext==-1
*if firms are dropping layers
replace avg_wage_notop_bef=(wage_lyr0+wage_lyr1+wage_lyr2)/(empl_lyr0+empl_lyr1+empl_lyr2) if count_mgmt_unt>2 & lnext==2
replace avg_wage_notop_bef=(wage_lyr0+wage_lyr1)/(empl_lyr0+empl_lyr1) if count_mgmt_unt>1 & lnext==1
replace avg_wage_notop_bef=(wage_lyr0)/(empl_lyr0) if count_mgmt_unt>0 & lnext==0
replace dlog_notop=log(F.avg_wage)-log(avg_wage_notop_bef) if count_mgmt_unt>lnext
count if dlog_notop~=. & lnext==-1
*if the firm doesn't change layers
replace dlog_notop=log(F.avg_wage)-log(avg_wage) if lnext==count_mgmt_unt

*counting whether we are covering all observed transitions	
count if dlog_notop==. & lnext~=-1

*changes in skills in pre-existing layers
foreach variable in educ tenure {
		gen avg_`variable'=tot_`variable'/empl_unt
		gen avg_`variable'_notop_next=.
		gen avg_`variable'_notop_bef=.
		gen dlog_`variable'_notop=.
		**if firms are adding layers
		replace avg_`variable'_notop_next=(F.`variable'_lyr0+F.`variable'_lyr1+F.`variable'_lyr2)/(F.empl_lyr0+F.empl_lyr1+F.empl_lyr2) if count_mgmt_unt==2 & lnext>2
		replace avg_`variable'_notop_next=(F.`variable'_lyr0+F.`variable'_lyr1)/(F.empl_lyr0+F.empl_lyr1) if count_mgmt_unt==1 & lnext>1
		replace avg_`variable'_notop_next=(F.`variable'_lyr0)/(F.empl_lyr0) if count_mgmt_unt==0 & lnext>0
		replace dlog_`variable'_notop=log(avg_`variable'_notop_next)-log(avg_`variable') if count_mgmt_unt<lnext
		count if dlog_`variable'_notop~=. & lnext==-1
		*if firms are dropping layers
		replace avg_`variable'_notop_bef=(`variable'_lyr0+`variable'_lyr1+`variable'_lyr2)/(empl_lyr0+empl_lyr1+empl_lyr2) if count_mgmt_unt>2 & lnext==2
		replace avg_`variable'_notop_bef=(`variable'_lyr0+`variable'_lyr1)/(empl_lyr0+empl_lyr1) if count_mgmt_unt>1 & lnext==1
		replace avg_`variable'_notop_bef=(`variable'_lyr0)/(empl_lyr0) if count_mgmt_unt>0 & lnext==0
		replace dlog_notop=log(F.avg_`variable')-log(avg_`variable'_notop_bef) if count_mgmt_unt>lnext
		count if dlog_`variable'_notop~=. & lnext==-1
		*if the firm doesn't change layers
		replace dlog_`variable'_notop=log(F.avg_`variable')-log(avg_`variable') if lnext==count_mgmt_unt

		*counting whether we are covering all observed transitions	
		count if dlog_`variable'_notop==. & lnext~=-1
	}

* generating normalized empl
gen norm_empl_lyr0=.
gen norm_empl_lyr1=.
gen norm_empl_lyr2=.
gen norm_empl_lyr3=.
forvalues l=0/3 {
		forvalues c=0/`l' {
			gen temp = empl_lyr`c'/empl_lyr`l' if count_mgmt_unt==`l'
			replace norm_empl_lyr`c' = temp if count_mgmt_unt==`l'
			drop temp
		}		
}
egen norm_empl=rowtotal(norm_empl*)

*log changes of layer-level variables
sort untid jahr
foreach v in avg_wage_lyr norm_empl_lyr empl_lyr avg_educ_lyr avg_tenure_lyr {

	forvalues l=0/3 {
		gen log_`v'`l' = log(`v'`l')
		
		gen `v'`l'_next=F.`v'`l'
		gen dlog_`v'`l'=F.log_`v'`l'-log_`v'`l'
		
		*two periods ahead vs. before the change
		gen dlog02_`v'`l'=F2.log_`v'`l'-log_`v'`l'
		*two periods ahead vs. one period ahead
		gen dlog12_`v'`l'=F2.log_`v'`l'-F.log_`v'`l'
		
	}

}

*direction of change
foreach v in Sales empl_unt norm_empl avg_wage {
	gen dir_`v' =.
	replace dir_`v'=1  if F.`v'>`v' & F.`v'~=.
	replace dir_`v'=0  if F.`v'==`v' & F.`v'~=.
	replace dir_`v'=-1 if F.`v'<`v' & `v'~=.
	gen dlog_`v'=log(F.`v')-log(`v')
	count if lnext~=-1 & dir_`v'==.

}

*change in wage at the top
gen dlog_avg_wage_top=.
forvalues b=0/3 {
	forvalues a=0/3 {

		replace dlog_avg_wage_top=log(avg_wage_lyr`a'_next)-log(avg_wage_lyr`b') if count_mgmt_unt==`b' & lnext==`a'
		
	}
}
*change in employment at the top
gen dlog_empl_top=.
forvalues b=0/3 {
	forvalues a=0/3 {

		replace dlog_empl_top=log(empl_lyr`a'_next)-log(empl_lyr`b') if count_mgmt_unt==`b' & lnext==`a'
		
	}
}	

*detrended wage: removing mean wage change by jahr.
gen dlog_wage_detrended=.
forvalues y=1998/2008 {
	sum dlog_avg_wage if jahr==`y'
	replace dlog_wage_detrended=dlog_avg_wage-r(mean) if jahr==`y'
}

*variables for decomposition of changes in average wage
********************************************************

*share of hours in common layers, firms that add layers
gen sharehours_notop_next=.

replace sharehours_notop_next=(F.empl_lyr0+F.empl_lyr1+F.empl_lyr2)/(F.empl_lyr0+F.empl_lyr1+F.empl_lyr2+F.empl_lyr3) if count_mgmt_unt==2 & lnext==3

replace sharehours_notop_next=(F.empl_lyr0+F.empl_lyr1)/(F.empl_lyr0+F.empl_lyr1+F.empl_lyr2) if count_mgmt_unt==1 & lnext==2
replace sharehours_notop_next=(F.empl_lyr0+F.empl_lyr1)/(F.empl_lyr0+F.empl_lyr1+F.empl_lyr2+F.empl_lyr3) if count_mgmt_unt==1 & lnext==3

replace sharehours_notop_next=(F.empl_lyr0)/(F.empl_lyr0+F.empl_lyr1) if count_mgmt_unt==0 & lnext==1
replace sharehours_notop_next=(F.empl_lyr0)/(F.empl_lyr0+F.empl_lyr1+F.empl_lyr2) if count_mgmt_unt==0 & lnext==2
replace sharehours_notop_next=(F.empl_lyr0)/(F.empl_lyr0+F.empl_lyr1+F.empl_lyr2+F.empl_lyr3) if count_mgmt_unt==0 & lnext==3


*share of hours in common layers, firms that drop layers
gen sharehours_notop_bef=.

replace sharehours_notop_bef=(empl_lyr0+empl_lyr1+empl_lyr2)/(empl_lyr0+empl_lyr1+empl_lyr2+empl_lyr3) if count_mgmt_unt==3 & lnext==2

replace sharehours_notop_bef=(empl_lyr0+empl_lyr1)/(empl_lyr0+empl_lyr1+empl_lyr2) if count_mgmt_unt==2 & lnext==1
replace sharehours_notop_bef=(empl_lyr0+empl_lyr1)/(empl_lyr0+empl_lyr1+empl_lyr2+empl_lyr3) if count_mgmt_unt==3 & lnext==1

replace sharehours_notop_bef=(empl_lyr0)/(empl_lyr0+empl_lyr1) if count_mgmt_unt==1 & lnext==0
replace sharehours_notop_bef=(empl_lyr0)/(empl_lyr0+empl_lyr1+empl_lyr2) if count_mgmt_unt==2 & lnext==0
replace sharehours_notop_bef=(empl_lyr0)/(empl_lyr0+empl_lyr1+empl_lyr2+empl_lyr3) if count_mgmt_unt==3 & lnext==0


*avg_wage in added layers
gen avg_wage_top_next=.

replace avg_wage_top_next=(F.wage_lyr3)/(F.empl_lyr3) if count_mgmt_unt==2 & lnext==3

replace avg_wage_top_next=(F.wage_lyr2)/(F.empl_lyr2) if count_mgmt_unt==1 & lnext==2
replace avg_wage_top_next=(F.wage_lyr2+F.wage_lyr3)/(F.empl_lyr2+F.empl_lyr3) if count_mgmt_unt==1 & lnext==3

replace avg_wage_top_next=(F.wage_lyr1)/(F.empl_lyr1) if count_mgmt_unt==0 & lnext==1
replace avg_wage_top_next=(F.wage_lyr1+F.wage_lyr2)/(F.empl_lyr1+F.empl_lyr2) if count_mgmt_unt==0 & lnext==2
replace avg_wage_top_next=(F.wage_lyr1+F.wage_lyr2+F.wage_lyr3)/(F.empl_lyr1+F.empl_lyr2+F.empl_lyr3) if count_mgmt_unt==0 & lnext==3


*avg_wage in dropped layers
gen avg_wage_top_bef=.

replace avg_wage_top_bef=(wage_lyr3)/(empl_lyr3) if count_mgmt_unt==3 & lnext==2

replace avg_wage_top_bef=(wage_lyr2)/(empl_lyr2) if count_mgmt_unt==2 & lnext==1
replace avg_wage_top_bef=(wage_lyr2+wage_lyr3)/(empl_lyr2+empl_lyr3) if count_mgmt_unt==3 & lnext==1

replace avg_wage_top_bef=(wage_lyr1)/(empl_lyr1) if count_mgmt_unt==1 & lnext==0
replace avg_wage_top_bef=(wage_lyr1+wage_lyr2)/(empl_lyr1+empl_lyr2) if count_mgmt_unt==2 & lnext==0
replace avg_wage_top_bef=(wage_lyr1+wage_lyr2+wage_lyr3)/(empl_lyr1+empl_lyr2+empl_lyr3) if count_mgmt_unt==3 & lnext==0

*checking whether the avg wage implied by these new variables coincides 
*with the correct avg wage, for firms adding layers
gen avg_wage_next=avg_wage_notop_next*sharehours_notop_next+avg_wage_top_next*(1-sharehours_notop_next)
gen diff=avg_wage_next-F.avg_wage
sum diff
replace diff=100*diff/F.avg_wage
sum diff
count if avg_wage_next~=F.avg_wage & avg_wage_next~=.
count if avg_wage_next~=.
drop diff


*checking whether the avg wage implied by these new variables coincides with 
*the correct avg hourly wage, for firms dropping layers
gen avg_wage_bef=avg_wage_notop_bef*sharehours_notop_bef+avg_wage_top_bef*(1-sharehours_notop_bef)
gen diff=avg_wage_bef-avg_wage
sum diff
replace diff=100*diff/avg_wage
sum diff
count if avg_wage_bef~=avg_wage & avg_wage_bef~=.
drop diff


*wage decomposition
gen common=avg_wage_notop_next/avg_wage if count_mgmt_unt<lnext
gen added=avg_wage_top_next/avg_wage if count_mgmt_unt<lnext
drop avg_wage_next

gen common_bef=avg_wage_notop_bef/F.avg_wage if count_mgmt_unt>lnext
gen common_bef1=1/common_bef //to make it comparable to tables 
gen dropped=avg_wage_top_bef/F.avg_wage if count_mgmt_unt>lnext
drop avg_wage_bef

*generating detrended variables
foreach s in avg_wage_lyr norm_empl_lyr empl_lyr avg_educ_lyr avg_tenure_lyr {
		forvalues l=0/3 {
			gen det_dlog_`s'`l'=.
			forvalues y=1998/2010 {
				sum dlog_`s'`l' if jahr==`y'
				replace det_dlog_`s'`l'=dlog_`s'`l'-r(mean) if jahr==`y'
			}
			
		}
	
	}
	
gen det_dlog_avg_wage_top = .
forvalues l = 0/3 {
replace det_dlog_avg_wage_top = det_dlog_avg_wage_lyr`l' if count_mgmt_unt == `l'
}

*generating detrended log_Sales
capture drop det_dlog_Sales
gen det_dlog_Sales=.
forvalues y=2005/2010 {
	sum dlog_Sales if jahr==`y'
	replace det_dlog_Sales=dlog_Sales-r(mean) if jahr==`y'
}


compress 
save "data/CMRH_2000-2010_p_layer-firm.dta", replace

********************************************************************************
********************************************************************************

log close
